from docx import Document
import jieba
from collections import Counter
import pandas as pd

# 打开Word文档
doc = Document('文档路径')
content = "".join([para.text for para in doc.paragraphs])
seq_list = jieba.cut(content, cut_all=False)
seq_filtered = [word for word in seq_list if len(word) >= 2]

counter = Counter(seq_filtered)

list(counter.items())

for key,count in list(counter.items()):
    print(key,count)

df = pd.DataFrame(list(counter.items()),columns=['词语','出现次数'])
df.sort_values(by='出现次数',ascending=False,inplace=True)
df.to_excel('词频统计.xlsx', index=False)
